Subset macrophages
unique(cell_type)
## [1] "Macrophages intermediate" "Proliferating"
## [3] "Infiltrating Type 1" "Macrophages Repair"
## [5] "DCs injury" "cDC1"
## [7] "Infiltrating Type 2" "mregDCs"
## [9] "Unknown 1" "T Cells"
## [11] "CD11a Myeloid" "DCs residentes"
## [13] "Macrophages resident" "Eosinophils"
## [15] "B Cells" "Fibrocytes"
## [17] "ILC2" "Granulocytes"
## [19] "Unknown 2" "NK Cells"
## [21] "Plasma cells"
keep_macs <- c("Macrophages intermediate", "Macrophages Repair", "Infiltrating Type 2",
"Infiltrating Type 1", "CD11a Myeloid", "Macrophages resident" )
srt_mac <- srt[, srt$cell_type %in% keep_macs ]
srt_mac
## An object of class Seurat
## 16864 features across 29535 samples within 1 assay
## Active assay: RNA (16864 features, 3000 variable features)
## 3 dimensional reductions calculated: pca, umap, tsne
table(srt_mac$cell_type)
##
## CD11a Myeloid Infiltrating Type 1 Infiltrating Type 2
## 339 5579 3924
## Macrophages intermediate Macrophages Repair Macrophages resident
## 9304 5873 4516
mac_colors <- c("darkorange4","red", "yellow", "green4", "blue", "navy" )
DimPlot(srt_mac, reduction = "tsne", group.by = "cell_type", cols = colors <- mac_colors, pt.size = 1)

srt_macs <- srt[,srt$cell_type %in% keep_macs]
srt_macs
## An object of class Seurat
## 16864 features across 29535 samples within 1 assay
## Active assay: RNA (16864 features, 3000 variable features)
## 3 dimensional reductions calculated: pca, umap, tsne
sce <- readRDS("/mnt/nmorais-nfs/marta/pB_joana/pC_data/sce_all-samples-after-qc.rds")
sce$cell_type <- srt$cell_type
sce_macs <- sce[, sce$cell_type %in% keep_macs]
assays(sce_macs)$logcounts <- NULL
assays(sce_macs)$limma <- NULL
sce_macs
## class: SingleCellExperiment
## dim: 16864 29535
## metadata(0):
## assays(1): counts
## rownames(16864): Xkr4 Mrpl15 ... CAAA01147332.1 AC149090.1
## rowData names(0):
## colnames(29535): AAACCCAAGAGAGGGC-1 AAACCCAAGCACGTCC-1 ...
## TTTGTTGGTCTGATAC-1 TTTGTTGTCGCCGAGT-1
## colData names(18): total_counts log10_total_counts ... doublet
## cell_type
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
mat <- assays(sce_macs)$counts
gene_sum <- rowSums(mat)
gene_sum[1:5]
## Xkr4 Mrpl15 Lypla1 Tcea1 Rgs20
## 175 11654 12032 39067 300
summary(gene_sum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 380 2678 20860 12373 11991799
dim(mat)
## [1] 16864 29535
mat <- mat[gene_sum > 0,]
dim(mat)
## [1] 16855 29535
sce_macs <- sce_macs[gene_sum > 0,]
sce_macs
## class: SingleCellExperiment
## dim: 16855 29535
## metadata(0):
## assays(1): counts
## rownames(16855): Xkr4 Mrpl15 ... CAAA01147332.1 AC149090.1
## rowData names(0):
## colnames(29535): AAACCCAAGAGAGGGC-1 AAACCCAAGCACGTCC-1 ...
## TTTGTTGGTCTGATAC-1 TTTGTTGTCGCCGAGT-1
## colData names(18): total_counts log10_total_counts ... doublet
## cell_type
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
total_counts <- colSums(mat)
total_counts[1:5]
## AAACCCAAGAGAGGGC-1 AAACCCAAGCACGTCC-1 AAACCCACAATTGTGC-1 AAACCCAGTTAAACAG-1
## 10496 29521 28294 7699
## AAACCCATCATACAGC-1
## 13169
total_features <- mat > 0
total_features <- colSums(total_features)
total_features[1:5]
## AAACCCAAGAGAGGGC-1 AAACCCAAGCACGTCC-1 AAACCCACAATTGTGC-1 AAACCCAGTTAAACAG-1
## 3185 5187 5353 2592
## AAACCCATCATACAGC-1
## 3494
logmat <- log2(mat+1)
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.7 GiB
df1 <- data.frame(total_counts = total_counts,
total_features = total_features,
sample = sce_macs$sample,
sorting_day = sce_macs$sorting_day,
lane = sce_macs$lane)
df2 <- data.frame(log2_total_counts = log2(total_counts+1),
log2_total_features = log2(total_features+1),
sample = sce_macs$sample)
varMatrix <- getVarianceExplained(logmat, variables = df1)
plotExplanatoryVariables(
varMatrix,
variables = variables) +
ggtitle("Macrophages before normalization") +
theme(text = element_text(size=20)) +
scale_color_manual(values=c( "dodgerblue" ,"purple" , "green3","orange", "blue"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2 rows containing non-finite values (`stat_density()`).

make_barplot <- function(df, x, y, title, ylab){
ggplot(df, aes(x=x, y=y, fill=sample)) +
geom_boxplot(show.legend = FALSE) +
theme_bw() +
theme(text = element_text(size=20), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab(ylab) + xlab("Sample") +
#scale_fill_manual(values=sample_colors) +
ggtitle(title)
}
make_barplot(df1, df1$sample, df1$total_counts, "Macrophages before norm", "Library size")

make_barplot(df2, df2$sample, df2$log2_total_counts, "Macrophages before norm", "Library size (log2)")

make_barplot(df1, df1$sample, df1$total_features, "Macrophages before norm", "Total features")

make_barplot(df2, df2$sample, df2$log2_total_features, "Macrophages before norm", "Total features (log2)")

qclust <- quickCluster(sce_macs)
sce_macs <- computeSumFactors(sce_macs, clusters=qclust)
summary(sizeFactors(sce_macs))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1596 0.6644 0.9307 1.0000 1.2514 4.7143
sce_macs <- logNormCounts(sce_macs,
size_factors = sizeFactors(sce_macs),
log = TRUE,
pseudo_count = 1,
exprs_values = "counts"
)
mat <- assays(sce_macs)$logcounts
total_counts <- colSums((2**mat)-1)
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.7 GiB
total_counts[1:5]
## AAACCCAAGAGAGGGC-1 AAACCCAAGCACGTCC-1 AAACCCACAATTGTGC-1 AAACCCAGTTAAACAG-1
## 11007.69 11972.51 10600.48 10973.16
## AAACCCATCATACAGC-1
## 11626.40
df3 <- data.frame(total_counts = total_counts,
sample = sce_macs$sample,
sorting_day = sce_macs$sorting_day,
lane = sce_macs$lane,
total_features = total_features)
df4 <- data.frame(
log2_total_counts = log2(total_counts),
log2_total_features = log2(total_features),
sample = sce_macs$sample,
sorting_day = sce_macs$sorting_day,
lane = sce_macs$lane)
make_barplot(df3, df3$sample, df3$total_counts, "Macrophages after norm", "Library size") #+ylim(0, 60000)

make_barplot(df4, df4$sample, df4$log2_total_counts, "Macrophages after norm", "Library size (log2)") +ylim(11,16)

normVarMatrix <- getVarianceExplained(assays(sce_macs)$logcounts, variables = df3[,c("total_counts", "total_features", "sample", "sorting_day", "lane")])
plotExplanatoryVariables(
normVarMatrix,
variables = variables) +
ggtitle("Macrophages after norm") +
theme(text = element_text(size=20)) +
scale_color_manual(values=c( "green3","purple" ,"orange", "dodgerblue" , "blue"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

saveRDS(sce_macs, "/mnt/nmorais-nfs/marta/pB_joana/pC_data/sce_macs_from_raw.rds")
norm_mat <- (2**assays(sce_macs)$logcounts)-1
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.7 GiB
ln_mat <- log(norm_mat + 1)
srt_subset <- CreateSeuratObject(counts = ln_mat, min.cells = 0, min.features = 0)
## Warning: Non-unique features (rownames) present in the input matrix, making
## unique
## Warning: Non-unique cell names (colnames) present in the input matrix, making
## unique
srt_subset
## An object of class Seurat
## 16855 features across 29535 samples within 1 assay
## Active assay: RNA (16855 features, 0 variable features)
srt_subset <- FindVariableFeatures(srt_subset, selection.method = "vst", nfeatures = 3000)
# Identify the 20 most highly variable genes
top20 <- head(VariableFeatures(srt_subset), 20)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(srt_subset)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

all.genes <- rownames(srt_subset)
srt_subset <- ScaleData(srt_subset, features = all.genes)
## Centering and scaling data matrix
srt_subset <- RunPCA(srt_subset, features = VariableFeatures(object = srt_subset), npcs = 100)
## PC_ 1
## Positive: Lgals3, Pkm, Clec4d, Thbs1, Tmsb10, S100a11, Clec4e, Cstb, Gapdh, Chil3
## Mdm2, S100a4, Rnh1, Fn1, S100a6, Nme2, F10, Vcan, Adam8, Anxa2
## Plac8, Capg, Prdx5, Pim1, Cd52, Txn1, Pgk1, Por, Ifitm3, Emilin2
## Negative: Selenop, Tanc2, Frmd4b, C1qa, Cd81, Pltp, Zfhx3, Igfbp4, Gas6, Zbtb20
## Serinc3, Fcgrt, Trf, C1qc, Lyve1, Zdhhc14, Cd163, Hspa1a, Hspa1b, Arhgef3
## Kitl, Zswim6, Mtss1, Abca9, Mgl2, C1qb, Timp2, Slc9a9, Glul, Zfp36l1
## PC_ 2
## Positive: Nfkb1, Rbpj, Cflar, Marcksl1, Ehd1, Ahnak, Cxcl2, Kdm6b, Plek, Nfkbia
## Nfkbiz, Zfp36, Eps8, F13a1, Cebpb, Nlrp3, Txnrd1, Ccl9, Birc3, Sept11
## Rapgef2, Pde4b, Cd14, Clec4e, Neat1, Tnf, Cdk14, Cfh, Ifrd1, Tnfsf9
## Negative: Ctss, Trem2, Ms4a7, Syngr1, Hexb, Psap, Lgals3bp, Apoe, Ctsh, Tmem86a
## Gpnmb, Itm2b, Mpeg1, Aif1, Gngt2, Ctsb, Ctsd, Abcg1, Bst2, Cd300c2
## Lst1, Pld3, Grn, Tyrobp, Fabp5, Rgs2, Creg1, Ptms, Cd68, Cd63
## PC_ 3
## Positive: Pf4, Ctsb, Cd63, Ctsd, Arhgap10, Emp1, Lgals1, Ctsl, Grn, Vat1
## Cd36, Spp1, Fabp5, C3ar1, Cd68, Stab1, Gpnmb, Folr2, Cstb, Dab2
## Trem2, Slc7a8, Slc6a8, Timp2, Syngr1, Nrp2, Pdpn, Mt1, Lhfpl2, Sash1
## Negative: Plbd1, Gpr141, Itgal, Napsa, Ifitm6, Cytip, Plac8, Gsr, Klra2, Sell
## Arhgap26, Cybb, Hp, Lsp1, Ccr2, Adgre5, Cd52, Adgre4, Fgr, Sorl1
## Nedd9, Mir142hg, Mcemp1, Arhgap15, Cd74, Sik3, Ace, Zfp710, Stap1, Parp8
## PC_ 4
## Positive: F13a1, Ccl6, S100a6, S100a4, S100a10, Ccl9, Gda, Lyve1, Anxa2, Chil3
## Cfp, Txnip, Cbr2, Plac8, Ifitm3, Ednrb, Vcan, Msr1, Lgals1, Capg
## Maf, Pdia6, Plekhg5, Fgfr1, Folr2, Stxbp6, S100a11, Hp, Dnm1, Itgam
## Negative: Cxcl16, Bcl2a1b, H2-Eb1, Ccrl2, H2-Aa, H2-Ab1, Cd83, Bcl2a1d, Cd74, Axl
## Rel, Dock10, Slamf7, Nlrp3, Nfkb1, Pde4b, Tlr2, Nfe2l2, H2-DMb1, Mthfs
## Tnfaip3, Tgfbr1, Tnip3, Ccl4, Tmem176b, Il1b, Arl5c, Tnfrsf1b, Tmem176a, H2-DMa
## PC_ 5
## Positive: Isg15, Ifi211, Rsad2, Ifit3, Irf7, Slfn5, Ifit2, Mndal, Ifi209, Ifi204
## Oasl2, Ifi203, Phf11d, Phf11b, Ifi47, Ifi213, Zbp1, Rnf213, Trim30a, Ifi205
## Usp18, Ifi206, Parp14, Fcgr1, A330040F15Rik, Ifit1, Slfn1, Slfn4, Ccl12, Oasl1
## Negative: Xylt1, Cytip, Gsr, Fyn, Ace, Pde3b, Havcr2, Cd9, Itgal, Treml4
## Mxi1, Gngt2, Smpdl3a, Mgst1, Gpcpd1, Runx2, Antxr2, Lyst, Msrb1, Fam117b
## Gcnt2, Zfyve9, Nedd9, Cd244a, Sorl1, F5, Pglyrp1, Klhl2, Stap1, Mrtfa
ElbowPlot(srt_subset, ndims = 100) +
geom_vline(xintercept = 25, color = "red", alpha = 0.5) +
theme(text = element_text(size = 10),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10))

srt_subset$sample <- sce_macs$sample
srt_subset$lane <- sce_macs$lane
srt_subset$sorting_day <- sce_macs$sorting_day
srt_subset$cell_type <- sce_macs$cell_type
DimPlot(srt_subset, reduction = "pca", group.by = "cell_type", cols = mac_colors)

srt_subset <- RunUMAP(srt_subset, n.components = 10, features = VariableFeatures(srt_subset))
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:45:37 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:45:37 Read 29535 rows and found 3000 numeric columns
## 16:45:37 Using Annoy for neighbor search, n_neighbors = 30
## 16:45:37 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:46:11 Writing NN index file to temp file /tmp/Rtmpc4sgW7/filedba455ec40f2
## 16:46:11 Searching Annoy index using 1 thread, search_k = 3000
## 16:53:04 Annoy recall = 100%
## 16:53:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:53:07 Initializing from normalized Laplacian + noise (using irlba)
## 16:53:08 Commencing optimization for 200 epochs, with 1608828 positive edges
## 16:53:37 Optimization finished
DimPlot(srt_subset, reduction = "umap", group.by = "cell_type", cols = mac_colors)

srt_subset <- RunTSNE(srt_subset,
dim.embed = 3,
dims = 1:25)
DimPlot(srt_subset, reduction = "tsne", group.by = "cell_type", cols = mac_colors)

saveRDS(srt_subset, "/mnt/nmorais-nfs/marta/pB_joana/pC_data/srt-mono-macs-from-raw.rds")
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] scater_1.22.0 scran_1.22.1
## [3] scuttle_1.4.0 SingleCellExperiment_1.16.0
## [5] SummarizedExperiment_1.24.0 Biobase_2.54.0
## [7] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1
## [9] IRanges_2.28.0 S4Vectors_0.32.4
## [11] BiocGenerics_0.40.0 MatrixGenerics_1.6.0
## [13] matrixStats_0.63.0 ggplot2_3.4.1
## [15] SeuratObject_4.1.3 Seurat_4.1.1
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.8 igraph_1.3.5
## [3] lazyeval_0.2.2 sp_1.6-0
## [5] splines_4.1.2 BiocParallel_1.28.3
## [7] listenv_0.9.0 scattermore_0.8
## [9] digest_0.6.31 htmltools_0.5.4
## [11] viridis_0.6.2 fansi_1.0.4
## [13] magrittr_2.0.3 ScaledMatrix_1.2.0
## [15] tensor_1.5 cluster_2.1.4
## [17] ROCR_1.0-11 limma_3.50.3
## [19] globals_0.16.2 spatstat.sparse_3.0-0
## [21] colorspace_2.1-0 ggrepel_0.9.3
## [23] xfun_0.37 dplyr_1.1.0
## [25] RCurl_1.98-1.10 jsonlite_1.8.4
## [27] progressr_0.13.0 spatstat.data_3.0-0
## [29] survival_3.5-0 zoo_1.8-11
## [31] glue_1.6.2 polyclip_1.10-4
## [33] gtable_0.3.1 zlibbioc_1.40.0
## [35] XVector_0.34.0 leiden_0.4.3
## [37] DelayedArray_0.20.0 BiocSingular_1.10.0
## [39] future.apply_1.10.0 abind_1.4-5
## [41] scales_1.2.1 edgeR_3.36.0
## [43] spatstat.random_3.1-3 miniUI_0.1.1.1
## [45] Rcpp_1.0.10 viridisLite_0.4.1
## [47] xtable_1.8-4 dqrng_0.3.0
## [49] reticulate_1.26 spatstat.core_2.4-4
## [51] rsvd_1.0.5 metapod_1.2.0
## [53] htmlwidgets_1.6.1 httr_1.4.4
## [55] RColorBrewer_1.1-3 ellipsis_0.3.2
## [57] ica_1.0-3 farver_2.1.1
## [59] pkgconfig_2.0.3 sass_0.4.5
## [61] uwot_0.1.14 deldir_1.0-6
## [63] locfit_1.5-9.7 utf8_1.2.3
## [65] labeling_0.4.2 tidyselect_1.2.0
## [67] rlang_1.0.6 reshape2_1.4.4
## [69] later_1.3.0 munsell_0.5.0
## [71] tools_4.1.2 cachem_1.0.6
## [73] cli_3.6.0 generics_0.1.3
## [75] ggridges_0.5.4 evaluate_0.20
## [77] stringr_1.5.0 fastmap_1.1.0
## [79] yaml_2.3.7 goftest_1.2-3
## [81] knitr_1.42 fitdistrplus_1.1-8
## [83] purrr_1.0.1 RANN_2.6.1
## [85] pbapply_1.7-0 future_1.31.0
## [87] nlme_3.1-162 sparseMatrixStats_1.6.0
## [89] mime_0.12 compiler_4.1.2
## [91] rstudioapi_0.14 beeswarm_0.4.0
## [93] plotly_4.10.1 png_0.1-8
## [95] spatstat.utils_3.0-1 statmod_1.5.0
## [97] tibble_3.1.8 bslib_0.4.2
## [99] stringi_1.7.12 highr_0.10
## [101] bluster_1.4.0 lattice_0.20-45
## [103] Matrix_1.5-3 vctrs_0.5.2
## [105] pillar_1.8.1 lifecycle_1.0.3
## [107] spatstat.geom_3.0-6 lmtest_0.9-40
## [109] jquerylib_0.1.4 BiocNeighbors_1.12.0
## [111] RcppAnnoy_0.0.20 data.table_1.14.6
## [113] cowplot_1.1.1 bitops_1.0-7
## [115] irlba_2.3.5.1 httpuv_1.6.8
## [117] patchwork_1.1.2 R6_2.5.1
## [119] promises_1.2.0.1 KernSmooth_2.23-20
## [121] gridExtra_2.3 vipor_0.4.5
## [123] parallelly_1.34.0 codetools_0.2-18
## [125] MASS_7.3-58.2 withr_2.5.0
## [127] sctransform_0.3.5 GenomeInfoDbData_1.2.7
## [129] mgcv_1.8-41 parallel_4.1.2
## [131] beachmat_2.10.0 grid_4.1.2
## [133] rpart_4.1.19 tidyr_1.3.0
## [135] rmarkdown_2.20 DelayedMatrixStats_1.16.0
## [137] Rtsne_0.16 shiny_1.7.4
## [139] ggbeeswarm_0.7.1